Introduction

For this analysis we are using the Climate Change: Earth Surface Temperature Data from kaggle. This data set can be found here: https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data#GlobalLandTemperaturesByCountry.csv

The goal is to show how the fxtract package can support climate and economical analysis.

Load required packages

library(fxtract)
library(lubridate)
library(tidyverse)
library(stringr)

Read in data

df_glob_temp = read.csv("GlobalLandTemperaturesByCountry.csv")
str(df_glob_temp)
## 'data.frame':    577462 obs. of  4 variables:
##  $ dt                           : Factor w/ 3239 levels "1743-11-01","1743-12-01",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ AverageTemperature           : num  4.38 NA NA NA NA ...
##  $ AverageTemperatureUncertainty: num  2.29 NA NA NA NA ...
##  $ Country                      : Factor w/ 243 levels "Ã…land","Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...

Data preprocessing

For our analysis we consider only years since 1900. The main reason for this is that in recent years we have had many missing values for some countries.

df_glob_temp$dt = as.Date(df_glob_temp$dt)
df1900_raw = df_glob_temp %>% filter(dt >= "1900-01-01")

Removal of duplicates and some smaller regions.

df1900 = df1900_raw %>% filter(!Country %in% c('Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
  'United Kingdom', 'South America', 'Ã…land', 'Africa',  'Asia', 'Baker Island', 'Curaçao', 'Kingman Reef', 'North America', 'Oceania', 
  'Palmyra Atoll', 'Saint Barthélemy', 'Saint Martin', 'Virgin Islands'))
df1900$Country = gsub("\\(Europe)", "", df1900$Country)

For an annual view we would like to have the year in a separate column.

df1900$year = year(df1900$dt)

fxtract

Create our user defined-functions

temp_stats = function(data) {
  allYears = unique(data$year)
  data = data %>% group_by(year) %>%
    summarise(
      mean = mean(AverageTemperature, na.rm = T),
      sd = sd(AverageTemperature, na.rm = T),
      min = min(AverageTemperature, na.rm = T),
      max = max(AverageTemperature, na.rm = T)
    )
  data = data %>% as.data.frame()
  res = c(mean = data$mean,
     sd = data$sd,
     min = data$min,
     max = data$max)
  allYears = unique(df1900$year)
  newnames = c(paste0("mean_", allYears), 
    paste0("sd_", allYears), 
    paste0("min_", allYears), 
    paste0("max_", allYears)) 
  names(res) = newnames
  res
}

Setup and calculation

xtractor = Xtractor$new("xtractor")
xtractor$n_cores = 2
xtractor$add_data(df1900, group_by = "Country")
xtractor$add_feature(temp_stats)
xtractor$calc_features()

Results

library(knitr)
library(kableExtra)
res = xtractor$results %>% gather(key = "key", value = "value", -Country) %>% 
  separate(key, c("key", "year")) %>% select(Country, year, key, value) %>% 
  as.data.frame()
res$value[is.infinite(res$value)] <- NA 
res %>% slice(1:20) %>% kable(col.names = c("Country", "Year", "Key", "Value")) %>% 
    kable_styling() %>%
    scroll_box(width = "100%", height = "400px")
Country Year Key Value
Afghanistan 1900 mean 13.749333
Albania 1900 mean 13.068583
Algeria 1900 mean 22.864167
American Samoa 1900 mean 26.273500
Andorra 1900 mean 11.348333
Angola 1900 mean 21.789917
Anguilla 1900 mean 26.406750
Antigua And Barbuda 1900 mean 26.229917
Argentina 1900 mean 14.807250
Armenia 1900 mean 8.243750
Aruba 1900 mean 28.027917
Australia 1900 mean 21.766583
Austria 1900 mean 6.670250
Azerbaijan 1900 mean 10.582500
Bahamas 1900 mean 25.057417
Bahrain 1900 mean 25.899250
Bangladesh 1900 mean 25.219083
Barbados 1900 mean 26.279167
Belarus 1900 mean 5.820000
Belgium 1900 mean 9.787333

Visualization

Linear Regression with fxtract

User defined functions

limo_slope = function(data){
  lin_model = lm(temp ~ year, data = data)
  slope = lin_model$coefficients[[2]]
  c("slope" = slope)
}

Setup and calculation

df = res %>% filter(key == "mean") %>% select(temp = value, everything())
df$year = as.numeric(df$year)
xtractor2 = Xtractor$new("xtractor2")
xtractor2$n_cores = 2
xtractor2$add_data(df, group_by = "Country")
xtractor2$add_feature(limo_slope)
xtractor2$calc_features()
res2 = xtractor2$results

Results

Country Slope
Afghanistan 0.0145613
Albania 0.0080569
Algeria 0.0125315
American Samoa 0.0095860
Andorra 0.0117471
Angola 0.0089852
Anguilla 0.0107910
Antigua And Barbuda 0.0109234
Argentina 0.0083684
Armenia 0.0143635
Aruba 0.0098219
Australia 0.0087285
Austria 0.0120316
Azerbaijan 0.0156783
Bahamas 0.0091516
Bahrain 0.0137320
Bangladesh 0.0071229
Barbados 0.0111324
Belarus 0.0138405
Belgium 0.0105878



Visualization

For a better overview, we can then plot our results in a map.